<!DOCTYPE html>
<html lang=en>
<head>
  <meta charset="utf-8">
  
  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, minimum-scale=1, user-scalable=no, minimal-ui">
  <meta name="renderer" content="webkit">
  <meta http-equiv="Cache-Control" content="no-transform" />
  <meta http-equiv="Cache-Control" content="no-siteapp" />
  <meta name="apple-mobile-web-app-capable" content="yes">
  <meta name="apple-mobile-web-app-status-bar-style" content="black">
  <meta name="format-detection" content="telephone=no,email=no,adress=no">
  <!-- Color theme for statusbar -->
  <meta name="theme-color" content="#000000" />
  <!-- 强制页面在当前窗口以独立页面显示,防止别人在框架里调用页面 -->
  <meta http-equiv="window-target" content="_top" />
  
  
  <title>ObjectDetection(1)_RCNN | 鲨鱼之家</title>
  <meta name="description" content="概述 Regions Proposals  首先面临的两个问题及其解决方法 怎么将数据集中任意大小的图片转换为要求大小的图片 怎么从region proposals提取固定大小的输入送到CNN中 边界框定位的关键性和一个改进方法   整个RCNN模型  整个模型包含了3个模块   训练  训练过程 再训练一个检测分类器的必要性   特征可视化 消融实验Ablation studies 错误分析">
<meta property="og:type" content="article">
<meta property="og:title" content="ObjectDetection(1)_RCNN">
<meta property="og:url" content="http://tina-yao.gitee.io/bigbig-shark/2022/08/15/ObjectDetection-1-RCNN/index.html">
<meta property="og:site_name" content="大鲨鱼">
<meta property="og:description" content="概述 Regions Proposals  首先面临的两个问题及其解决方法 怎么将数据集中任意大小的图片转换为要求大小的图片 怎么从region proposals提取固定大小的输入送到CNN中 边界框定位的关键性和一个改进方法   整个RCNN模型  整个模型包含了3个模块   训练  训练过程 再训练一个检测分类器的必要性   特征可视化 消融实验Ablation studies 错误分析">
<meta property="og:locale" content="en_US">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/RCNNimg/1.png">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/RCNNimg/2.png">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/RCNNimg/3.png">
<meta property="article:published_time" content="2022-08-15T01:47:59.000Z">
<meta property="article:modified_time" content="2022-08-15T01:55:35.027Z">
<meta property="article:author" content="BigbigShark">
<meta property="article:tag" content="目标检测">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/RCNNimg/1.png">
  <!-- Canonical links -->
  <link rel="canonical" href="http://tina-yao.gitee.io/bigbig-shark/2022/08/15/ObjectDetection-1-RCNN/index.html">
  
    <link rel="alternate" href="/atom.xml" title="大鲨鱼" type="application/atom+xml">
  
  
    <link rel="icon" href="/favicon.png" type="image/x-icon">
  
  
<link rel="stylesheet" href="/bigbig-shark/css/style.css">

  
  
  
  
<meta name="generator" content="Hexo 5.4.0"></head>


<body class="main-center theme-purple# 主题颜色 theme-black theme-blue theme-green theme-purple" itemscope itemtype="http://schema.org/WebPage">
  <header class="header" itemscope itemtype="http://schema.org/WPHeader">
  <div class="slimContent">
    <div class="navbar-header">
      
      
      <div class="profile-block text-center">
        <a id="avatar" href="https://tina-yao.gitee.io/bigbig-shark/" target="_blank">
          <img class="img-circle img-rotate" src="/bigbig-shark/images/avatar.jpg" width="200" height="200">
        </a>
        <h2 id="name" class="hidden-xs hidden-sm">大鲨鱼</h2>
        <h3 id="title" class="hidden-xs hidden-sm hidden-md">CV&amp;Robots</h3>
        <small id="location" class="text-muted hidden-xs hidden-sm"><i class="icon icon-map-marker"></i> Wuhan, China</small>
      </div>
      
      <div class="search" id="search-form-wrap">

    <form class="search-form sidebar-form">
        <div class="input-group">
            <input type="text" class="search-form-input form-control" placeholder="Search" />
            <span class="input-group-btn">
                <button type="submit" class="search-form-submit btn btn-flat" onclick="return false;"><i class="icon icon-search"></i></button>
            </span>
        </div>
    </form>
    <div class="ins-search">
  <div class="ins-search-mask"></div>
  <div class="ins-search-container">
    <div class="ins-input-wrapper">
      <input type="text" class="ins-search-input" placeholder="Type something..." x-webkit-speech />
      <button type="button" class="close ins-close ins-selectable" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">×</span></button>
    </div>
    <div class="ins-section-wrapper">
      <div class="ins-section-container"></div>
    </div>
  </div>
</div>


</div>
      <button class="navbar-toggle collapsed" type="button" data-toggle="collapse" data-target="#main-navbar" aria-controls="main-navbar" aria-expanded="false">
        <span class="sr-only">Toggle navigation</span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
    </div>
    <nav id="main-navbar" class="collapse navbar-collapse" itemscope itemtype="http://schema.org/SiteNavigationElement" role="navigation">
      <ul class="nav navbar-nav main-nav menu-highlight">
        
        
        <li class="menu-item menu-item-home">
          <a href="/bigbig-shark/.">
            
            <i class="icon icon-home-fill"></i>
            
            <span class="menu-title">Home</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-archives">
          <a href="/bigbig-shark/archives">
            
            <i class="icon icon-archives-fill"></i>
            
            <span class="menu-title">Archives</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-categories">
          <a href="/bigbig-shark/categories">
            
            <i class="icon icon-folder"></i>
            
            <span class="menu-title">Categories</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-tags">
          <a href="/bigbig-shark/tags">
            
            <i class="icon icon-tags"></i>
            
            <span class="menu-title">Tags</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-links">
          <a href="/bigbig-shark/links">
            
            <i class="icon icon-friendship"></i>
            
            <span class="menu-title">Links</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-about">
          <a href="/bigbig-shark/about">
            
            <i class="icon icon-cup-fill"></i>
            
            <span class="menu-title">About</span>
          </a>
        </li>
        
      </ul>
      
	
    <ul class="social-links">
    	
        <li><a href="https://gitee.com/tina-yao" target="_blank" title="Gitee" data-toggle=tooltip data-placement=top><i class="icon icon-gitee"></i></a></li>
        
    </ul>

    </nav>
  </div>
</header>

  
    <aside class="sidebar" itemscope itemtype="http://schema.org/WPSideBar">
  <div class="slimContent">
    
      <div class="widget">
    <h3 class="widget-title">Board</h3>
    <div class="widget-body">
        <div id="board">
            <div class="content">
                <p>欢迎交流与分享经验!</p>
            </div>
        </div>
    </div>
</div>

    
      
  <div class="widget">
    <h3 class="widget-title">Categories</h3>
    <div class="widget-body">
      <ul class="category-list"><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/AI/">AI</a><span class="category-list-count">1</span><ul class="category-list-child"><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/AI/ML/">ML</a><span class="category-list-count">1</span></li></ul></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/Concepts/">Concepts</a><span class="category-list-count">1</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/Robots/">Robots</a><span class="category-list-count">2</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E5%8D%8A%E6%97%A5%E9%97%B2/">半日闲</a><span class="category-list-count">5</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E6%95%B0%E5%AD%A6%E5%BB%BA%E6%A8%A1/">数学建模</a><span class="category-list-count">1</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90/">数据分析</a><span class="category-list-count">1</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%A7%86%E8%A7%89/">计算机视觉</a><span class="category-list-count">30</span></li></ul>
    </div>
  </div>


    
      
  <div class="widget">
    <h3 class="widget-title">Tag Cloud</h3>
    <div class="widget-body tagcloud">
      <a href="/bigbig-shark/tags/CNN-backbones/" style="font-size: 13.67px;">CNN_backbones</a> <a href="/bigbig-shark/tags/OpenCV/" style="font-size: 13px;">OpenCV</a> <a href="/bigbig-shark/tags/Python/" style="font-size: 13px;">Python</a> <a href="/bigbig-shark/tags/ROS/" style="font-size: 13px;">ROS</a> <a href="/bigbig-shark/tags/%E4%BB%A3%E7%A0%81/" style="font-size: 13px;">代码</a> <a href="/bigbig-shark/tags/%E5%85%83%E5%AE%87%E5%AE%99/" style="font-size: 13px;">元宇宙</a> <a href="/bigbig-shark/tags/%E5%85%B4%E8%B6%A3/" style="font-size: 13px;">兴趣</a> <a href="/bigbig-shark/tags/%E5%9C%BA%E6%99%AF%E6%96%87%E5%AD%97%E8%AF%86%E5%88%AB/" style="font-size: 13px;">场景文字识别</a> <a href="/bigbig-shark/tags/%E6%91%98%E6%8A%84/" style="font-size: 13.33px;">摘抄</a> <a href="/bigbig-shark/tags/%E6%95%B0%E6%8D%AE%E9%9B%86/" style="font-size: 13px;">数据集</a> <a href="/bigbig-shark/tags/%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B/" style="font-size: 14px;">目标检测</a> <a href="/bigbig-shark/tags/%E7%BE%8E%E8%B5%9B/" style="font-size: 13px;">美赛</a> <a href="/bigbig-shark/tags/%E8%81%9A%E7%B1%BB/" style="font-size: 13px;">聚类</a> <a href="/bigbig-shark/tags/%E8%87%AA%E5%8A%A8%E9%A9%BE%E9%A9%B6/" style="font-size: 13.33px;">自动驾驶</a> <a href="/bigbig-shark/tags/%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%A7%86%E8%A7%893D/" style="font-size: 13px;">计算机视觉3D</a> <a href="/bigbig-shark/tags/%E8%BD%BB%E9%87%8F%E7%BA%A7/" style="font-size: 13px;">轻量级</a>
    </div>
  </div>

    
  </div>
</aside>

  
  
<main class="main" role="main">
  <div class="content">
  <article id="post-ObjectDetection-1-RCNN" class="article article-type-post" itemscope itemtype="http://schema.org/BlogPosting">
    
    <div class="article-header">
      
        
  
    <h1 class="article-title" itemprop="name">
      ObjectDetection(1)_RCNN
    </h1>
  

      
      <div class="article-meta">
        <span class="article-date">
    <i class="icon icon-calendar-check"></i>
	<a href="/bigbig-shark/2022/08/15/ObjectDetection-1-RCNN/" class="article-date">
	  <time datetime="2022-08-15T01:47:59.000Z" itemprop="datePublished">2022-08-15</time>
	</a>
</span>
        
  <span class="article-category">
    <i class="icon icon-folder"></i>
    <a class="article-category-link" href="/bigbig-shark/categories/%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%A7%86%E8%A7%89/">计算机视觉</a>
  </span>

        
  <span class="article-tag">
    <i class="icon icon-tags"></i>
	<a class="article-tag-link-link" href="/bigbig-shark/tags/%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B/" rel="tag">目标检测</a>
  </span>


        

	<span class="article-read hidden-xs">
    	<i class="icon icon-eye-fill" aria-hidden="true"></i>
    	<span id="/bigbig-shark/2022/08/15/ObjectDetection-1-RCNN/" class="leancloud_visitors"  data-flag-title="ObjectDetection(1)_RCNN">
			<span class="leancloud-visitors-count">0</span>
		</span>
    </span>

        <span class="post-comment"><i class="icon icon-comment"></i> <a href="/bigbig-shark/2022/08/15/ObjectDetection-1-RCNN/#comments" class="article-comment-link">Comments</a></span>
        
	
		<span class="post-wordcount hidden-xs" itemprop="wordCount">Word Count: 1.9k(words)</span>
	
	
		<span class="post-readcount hidden-xs" itemprop="timeRequired">Read Count: 7(minutes)</span>
	

      </div>
    </div>
    <div class="article-entry marked-body" itemprop="articleBody">
      
        <!-- toc -->
<ul>
<li><a href="#%E6%A6%82%E8%BF%B0">概述</a></li>
<li><a href="#regions-proposals">Regions Proposals</a>
<ul>
<li><a href="#%E9%A6%96%E5%85%88%E9%9D%A2%E4%B8%B4%E7%9A%84%E4%B8%A4%E4%B8%AA%E9%97%AE%E9%A2%98%E5%8F%8A%E5%85%B6%E8%A7%A3%E5%86%B3%E6%96%B9%E6%B3%95">首先面临的两个问题及其解决方法</a></li>
<li><a href="#%E6%80%8E%E4%B9%88%E5%B0%86%E6%95%B0%E6%8D%AE%E9%9B%86%E4%B8%AD%E4%BB%BB%E6%84%8F%E5%A4%A7%E5%B0%8F%E7%9A%84%E5%9B%BE%E7%89%87%E8%BD%AC%E6%8D%A2%E4%B8%BA%E8%A6%81%E6%B1%82%E5%A4%A7%E5%B0%8F%E7%9A%84%E5%9B%BE%E7%89%87">怎么将数据集中任意大小的图片转换为要求大小的图片</a></li>
<li><a href="#%E6%80%8E%E4%B9%88%E4%BB%8Eregion-proposals%E6%8F%90%E5%8F%96%E5%9B%BA%E5%AE%9A%E5%A4%A7%E5%B0%8F%E7%9A%84%E8%BE%93%E5%85%A5%E9%80%81%E5%88%B0cnn%E4%B8%AD">怎么从region proposals提取固定大小的输入送到CNN中</a></li>
<li><a href="#%E8%BE%B9%E7%95%8C%E6%A1%86%E5%AE%9A%E4%BD%8D%E7%9A%84%E5%85%B3%E9%94%AE%E6%80%A7%E5%92%8C%E4%B8%80%E4%B8%AA%E6%94%B9%E8%BF%9B%E6%96%B9%E6%B3%95">边界框定位的关键性和一个改进方法</a></li>
</ul>
</li>
<li><a href="#%E6%95%B4%E4%B8%AArcnn%E6%A8%A1%E5%9E%8B">整个RCNN模型</a>
<ul>
<li><a href="#%E6%95%B4%E4%B8%AA%E6%A8%A1%E5%9E%8B%E5%8C%85%E5%90%AB%E4%BA%863%E4%B8%AA%E6%A8%A1%E5%9D%97">整个模型包含了3个模块</a></li>
</ul>
</li>
<li><a href="#%E8%AE%AD%E7%BB%83">训练</a>
<ul>
<li><a href="#%E8%AE%AD%E7%BB%83%E8%BF%87%E7%A8%8B">训练过程</a></li>
<li><a href="#%E5%86%8D%E8%AE%AD%E7%BB%83%E4%B8%80%E4%B8%AA%E6%A3%80%E6%B5%8B%E5%88%86%E7%B1%BB%E5%99%A8%E7%9A%84%E5%BF%85%E8%A6%81%E6%80%A7">再训练一个检测分类器的必要性</a></li>
</ul>
</li>
<li><a href="#%E7%89%B9%E5%BE%81%E5%8F%AF%E8%A7%86%E5%8C%96">特征可视化</a></li>
<li><a href="#%E6%B6%88%E8%9E%8D%E5%AE%9E%E9%AA%8Cablation-studies">消融实验Ablation studies</a></li>
<li><a href="#%E9%94%99%E8%AF%AF%E5%88%86%E6%9E%90">错误分析</a></li>
<li><a href="#%E4%BF%AE%E6%AD%A3">修正</a></li>
<li><a href="#%E5%9C%A8%E8%AF%AD%E4%B9%89%E5%88%86%E5%89%B2%E4%B8%8A%E7%9A%84%E5%BA%94%E7%94%A8">在语义分割上的应用</a></li>
<li><a href="#%E8%B4%A1%E7%8C%AE">贡献</a></li>
</ul>
<!-- tocstop -->
<h2><span id="概述">概述</span></h2>
<p><strong>文章标题</strong>：丰富的特征层次用于准确的物体检测和语义分割 (Rich feature hierarchies for accurate object detection and semantic segmentation)</p>
<p><strong>一句话搞懂RCNN</strong>：Regions with CNN features. 具体地讲，就是：</p>
<ol>
<li>在某张图上提取若干个region proposals；</li>
<li>然后使用CNN从每个region proposal上分别提取特征，使用该特征训练分类器。</li>
</ol>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/RCNNimg/1.png" alt="image-20220813160203438"></p>
<p><strong>背景</strong>：</p>
<ol>
<li>过去十年，在视觉识别上的研究和进展主要是基于<a target="_blank" rel="noopener" href="https://baike.baidu.com/item/SIFT/1396275">SIFT</a>（Scale-invariant feature transform，尺度不变特征变换）和<a target="_blank" rel="noopener" href="https://baike.baidu.com/item/HOG">HOG</a>（Histograms of Oriented Gradients，方向梯度直方图），并在此基础上进行模型集成和变种。但在10到12年，集成模型和变种带来的提升微乎其微。</li>
<li>大脑处理视觉信息是有好几个层次的，所以是否存在多阶段层次处理的方法，找到包含信息更多的特征。</li>
<li>CNN在分类上取得了重大突破，如果将CNN应用到目标检测上，效果如何。</li>
</ol>
<blockquote>
<p>CNN本质上也是在提取特征，只是和传统方法SIFT和HOG等提取出来的特征有区别而已（更丰富更突出更完整等）</p>
<p><a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/22476595">SIFT具体步骤和理解</a></p>
</blockquote>
<p><strong>实验内容</strong>：如何将CNN应用到目标检测上，应该需要做些什么处理，结果怎么样。</p>
<h2><span id="regions-proposals">Regions Proposals</span></h2>
<h3><span id="首先面临的两个问题及其解决方法">首先面临的两个问题及其解决方法</span></h3>
<p><strong>问题</strong>：已经明确CNN是用于特征提取的，而在使用CNN提取特征之前，还需要解决两个问题：</p>
<ol>
<li>如何找到region proposals，即如何用深度网络对图片中的物体进行定位(localizing objects with a deep network)
<ul>
<li>之前有方法将定位看作回归问题，但有前人实验发现该方法效果不好</li>
<li>另外一个选择是sliding-window detector（滑动窗口探测器），但新的问题也随之而来，作者的CNN网络比较深且在输入图片上采用了很大的感受野和步长，单纯地使用滑动窗口会产生精度损失问题。</li>
</ul>
</li>
<li>如何通过少量稀疏的数据训练出能力强大的模型(training a high-capacity model with only a small quantity of annotated detection data)
<ul>
<li>传统方法是使用无监督预训练，然后在训练集上进行微调</li>
</ul>
</li>
</ol>
<p><strong>解决</strong>：</p>
<ol>
<li>使用<a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/39927488">selective search</a></li>
<li>在额外的大型数据集（比如ILSVRC）上进行有监督预训练，然后在小训练集（比如PASCAL）上对特定部分的参数进行微调，是非常有效果的。(Supervised pre-training on a large auxiliary dataset, followed by domain-specific fine-tuning on a small data, is an effective paradigm for learning high-capacity CNNs when data is scarce)</li>
</ol>
<h3><span id="怎么将数据集中任意大小的图片转换为要求大小的图片">怎么将数据集中任意大小的图片转换为要求大小的图片</span></h3>
<ol>
<li>We warp all pixels in a tight bounding box around it to the required size.</li>
<li>Prior to warping, we dilate the tight bounding box so that at the warped size there are exactly p (=16) pixels of warped image context around the original box.</li>
</ol>
<h3><span id="怎么从region-proposals提取固定大小的输入送到cnn中">怎么从region proposals提取固定大小的输入送到CNN中</span></h3>
<p>使用了一种叫做<strong>affine image warping</strong>的简单技术，to compute a fixed-size CNN input from each region proposal, regardless of the region’s shape</p>
<h3><span id="边界框定位的关键性和一个改进方法">边界框定位的关键性和一个改进方法</span></h3>
<p>A simple bounding box regression method significantly reduces mislocalizations, which are the dominant error node.</p>
<h2><span id="整个rcnn模型">整个RCNN模型</span></h2>
<h3><span id="整个模型包含了3个模块">整个模型包含了3个模块</span></h3>
<ol>
<li>The first generates category-independent region proposals. These proposals define the set of candidate detections available to our detector.
<ul>
<li>测试时采用的方法：Fast mode Selective Search to extract around 2000 Region Proposals.</li>
</ul>
</li>
<li>The second module is a large convolutional neural network that extracts a fixed-length feature vector from each region.
<ul>
<li>CNN(AlexNet: 5 conv-layers + 2 fc-layers) for feature extraction.</li>
<li>Input: $227 \times 227$ RGB image</li>
<li>Output: 4096-d feature vector</li>
</ul>
</li>
<li>The third module is a set of class-specific linear SVMs.</li>
</ol>
<h2><span id="训练">训练</span></h2>
<h3><span id="训练过程">训练过程</span></h3>
<ol>
<li>Supervised training on a large auxiliary dataset (lr=0.01)</li>
<li>Domain-specific fine-tuning (注意此处using only warped region proposals, lr=0.001)</li>
<li>Object category classifiers (注意IoU overlap的threshold是学习到的，was selected by a grid search over {0, 0.1, …, 0.5} on a validation set，而且其值的选择很重要，对结果影响很大)
<ul>
<li>One linear SVM per class.</li>
<li>Since the training data is too large to fit in memory, we adopt the <a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/57440670">standard hard negative mining</a> method.</li>
</ul>
</li>
</ol>
<h3><span id="再训练一个检测分类器的必要性">再训练一个检测分类器的必要性</span></h3>
<p>即为什么不直接使用最后一个线性层的输出，而还要再训练一个检测分类器（真正的detector），以最后线性层的输出作为分类器的输入，再进行训练？</p>
<h2><span id="特征可视化">特征可视化</span></h2>
<p>该实验将最后一个max-pooling层输出的特征进行了可视化，来表现神经网络到底学到了什么，由此也证明了CNN作为特征提取器，可以学习到非常丰富的层次的特征；进而后面的线性层能够利用这些特征充分学习（<font color="red">进一步提出问题，为什么还要使用SVMs，看到最后也没看明白，我自己将其理解成利用boosting思想来提高模型表现</font>）。</p>
<blockquote>
<p><a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/65888174">常用的模型集成方法介绍</a></p>
</blockquote>
<h2><span id="消融实验ablation-studies">消融实验Ablation studies</span></h2>
<p>为了明白哪些层在检测任务中起到关键作用，对最后两层进行了评估。</p>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/RCNNimg/2.png" alt="image-20220813210252580"></p>
<p>从上表发现：</p>
<ol>
<li>没有微调时，fc7反而没有fc6收敛得好，甚至可以把fc6和fc7都丢掉（我感觉这个很正常，因为全连接层的本质就是映射和拟合，换了数据集之后，新的映射肯定和原来不一样；从微调（FT）后的实验结果就可以看出来，fc7的结果明显比fc6好）</li>
<li>微调后的提升是显著的，尤其是对fc6和fc7的提升（这不废话吗）</li>
<li>微调前后，pool5的提升没有fc6和fc7的提升显著（差得有点远），说明pool5输出的特征是泛化得比较好的，即CNN提取特征是比较通用的，而模型表现的提升最主要还是靠CNN后的非线性层来作用。</li>
<li>和其他提取特征的方法相比，CNN表现得好很多。</li>
</ol>
<h2><span id="错误分析">错误分析</span></h2>
<p>涉及到论文<a href="chrome-extension://bocbaocobfecmglnmeaeppambideimao/pdf/viewer.html?file=https%3A%2F%2Flink.springer.com%2Fcontent%2Fpdf%2F10.1007%252F978-3-642-33712-3_25.pdf">Diagnosing Error in Object Detectors</a></p>
<h2><span id="修正">修正</span></h2>
<p>基于错误分析，选择了使用bounding box regression，依据CNN输出的特征图，来预测新的detection windows。实验证明，该方法可以修正大量的错误定位的检测。</p>
<h2><span id="在语义分割上的应用">在语义分割上的应用</span></h2>
<p>语义分割的一项标准技术是区域分类，这使得RCNN可以成功应用于语义分割（当时表现最好的模型是$O_{2}P$，该模型的表现增益来自于CPMC regions和powerful second-order pooling of multiple feature types）。</p>
<p>CNN提取特征也是有技巧的，作者团队发现使用full image + a region’s foreground mask (前景)时，效果最好，如下图所示：</p>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/RCNNimg/3.png" alt="image-20220815092342097"></p>
<p>同时，可以发现，layer fc6 always outforms fc7，所以直接分析fc6：</p>
<ul>
<li>使用fg的效果略好于full，indicating that the masked region shape provides a stronger signal, matching our intuition.</li>
<li>而full + fg的效果最好，说明两者互补，full包含了丰富的信息。</li>
</ul>
<h2><span id="贡献">贡献</span></h2>
<ol>
<li>首先展现和证明了CNN用在目标检测上可以有显著成果（作为特征提取器可以提取到层次丰富的特征）。</li>
<li>证明了在大型训练集上进行有监督预训练，再到小训练集上进行微调是非常有效的。</li>
</ol>

      
    </div>
    <div class="article-footer">
      <blockquote class="mt-2x">
  <ul class="post-copyright list-unstyled">
    
    <li class="post-copyright-link hidden-xs">
      <strong>本文链接：</strong>
      <a href="http://tina-yao.gitee.io/bigbig-shark/2022/08/15/ObjectDetection-1-RCNN/" title="ObjectDetection(1)_RCNN" target="_blank" rel="external">http://tina-yao.gitee.io/bigbig-shark/2022/08/15/ObjectDetection-1-RCNN/</a>
    </li>
    
    <li class="post-copyright-license">
      <strong>版权声明： </strong> 本博客所有文章除特别声明外，均采用 <a href="http://creativecommons.org/licenses/by/4.0/deed.zh" target="_blank" rel="external">CC BY 4.0 CN协议</a> 许可协议。转载请注明出处！
    </li>
  </ul>
</blockquote>


<div class="panel panel-default panel-badger">
  <div class="panel-body">
    <figure class="media">
      <div class="media-left">
        <a href="https://tina-yao.gitee.io/bigbig-shark/" target="_blank" class="img-burn thumb-sm visible-lg">
          <img src="/bigbig-shark/images/avatar.jpg" class="img-rounded w-full" alt="">
        </a>
      </div>
      <div class="media-body">
        <h3 class="media-heading"><a href="https://tina-yao.gitee.io/bigbig-shark/" target="_blank"><span class="text-dark">大鲨鱼</span><small class="ml-1x">CV&amp;Robots</small></a></h3>
        <div>格物致知，诚意力行。</div>
      </div>
    </figure>
  </div>
</div>


    </div>
  </article>
  
    
  <section id="comments">
  	
      <div id="vcomments"></div>
    
  </section>


  
</div>

  <nav class="bar bar-footer clearfix" data-stick-bottom>
  <div class="bar-inner">
  
  <ul class="pager pull-left">
    
    <li class="prev">
      <a href="/bigbig-shark/2022/08/15/ObjectDetection-2-SPP/" title="ObjectDetection(2)_SPP"><i class="icon icon-angle-left" aria-hidden="true"></i><span>&nbsp;&nbsp;Newer</span></a>
    </li>
    
    
    <li class="next">
      <a href="/bigbig-shark/2022/08/06/%E4%BD%BF%E7%94%A8ResNet34%E5%92%8CMobileNetV2%E5%AD%A6%E4%B9%A0CIFAR10%E6%95%B0%E6%8D%AE%E9%9B%86/" title="使用ResNet34和MobileNetV2学习CIFAR10数据集"><span>Older&nbsp;&nbsp;</span><i class="icon icon-angle-right" aria-hidden="true"></i></a>
    </li>
    
    
  </ul>
  
  
  <!-- Button trigger modal -->
  <button type="button" class="btn btn-fancy btn-donate pop-onhover bg-gradient-warning" data-toggle="modal" data-target="#donateModal"><span>$</span></button>
  <!-- <div class="wave-icon wave-icon-danger btn-donate" data-toggle="modal" data-target="#donateModal">
    <div class="wave-circle"><span class="icon"><i class="icon icon-bill"></i></span></div>
  </div> -->
  
  
  <div class="bar-right">
    
    <div class="share-component" data-sites="weibo,qq,wechat" data-mobile-sites="weibo,qq"></div>
    
  </div>
  </div>
</nav>
  
<!-- Modal -->
<div class="modal modal-center modal-small modal-xs-full fade" id="donateModal" tabindex="-1" role="dialog">
  <div class="modal-dialog" role="document">
    <div class="modal-content donate">
      <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
      <div class="modal-body">
        <div class="donate-box">
          <div class="donate-head">
            <p>Maybe you could buy me a cup of coffee.</p>
          </div>
          <div class="tab-content">
            <div role="tabpanel" class="tab-pane fade active in" id="alipay">
              <div class="donate-payimg">
                <img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/images/donate/alipayimg.PNG#images/donate/alipayimg.png" alt="Scan Qrcode" title="Scan" />
              </div>
              <p class="text-muted mv">Scan this qrcode</p>
              <p class="text-grey">Open alipay app scan this qrcode, buy me a coffee!</p>
            </div>
            <div role="tabpanel" class="tab-pane fade" id="wechatpay">
              <div class="donate-payimg">
                <img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/images/donate/wechatpayimg.PNG#images/donate/wechatpayimg.png" alt="Scan Qrcode" title="Scan" />
              </div>
              <p class="text-muted mv">Scan this qrcode</p>
              <p class="text-grey">Open wechat app scan this qrcode, buy me a coffee!</p>
            </div>
          </div>
          <div class="donate-footer">
            <ul class="nav nav-tabs nav-justified" role="tablist">
              <li role="presentation" class="active">
                <a href="#alipay" id="alipay-tab" role="tab" data-toggle="tab" aria-controls="alipay" aria-expanded="true"><i class="icon icon-alipay"></i> alipay</a>
              </li>
              <li role="presentation" class="">
                <a href="#wechatpay" role="tab" id="wechatpay-tab" data-toggle="tab" aria-controls="wechatpay" aria-expanded="false"><i class="icon icon-wepay"></i> wechat payment</a>
              </li>
            </ul>
          </div>
        </div>
      </div>
    </div>
  </div>
</div>



</main>

  <footer class="footer" itemscope itemtype="http://schema.org/WPFooter">
	
	
    <ul class="social-links">
    	
        <li><a href="https://gitee.com/tina-yao" target="_blank" title="Gitee" data-toggle=tooltip data-placement=top><i class="icon icon-gitee"></i></a></li>
        
    </ul>

    <div class="copyright">
    	
        &copy; 2023 BigbigShark
        
        <div class="publishby">
        <!--
        	Theme by <a href="https://github.com/cofess" target="_blank"> cofess </a>base on <a href="https://github.com/cofess/hexo-theme-pure" target="_blank">pure</a>.
        -->
        </div>
    </div>
</footer>
  <script src="//cdn.jsdelivr.net/npm/jquery@1.12.4/dist/jquery.min.js"></script>
<script>
window.jQuery || document.write('<script src="js/jquery.min.js"><\/script>')
</script>

<script src="/bigbig-shark/js/plugin.min.js"></script>


<script src="/bigbig-shark/js/application.js"></script>


    <script>
(function (window) {
    var INSIGHT_CONFIG = {
        TRANSLATION: {
            POSTS: 'Posts',
            PAGES: 'Pages',
            CATEGORIES: 'Categories',
            TAGS: 'Tags',
            UNTITLED: '(Untitled)',
        },
        ROOT_URL: '/bigbig-shark/',
        CONTENT_URL: '/bigbig-shark/content.json',
    };
    window.INSIGHT_CONFIG = INSIGHT_CONFIG;
})(window);
</script>

<script src="/bigbig-shark/js/insight.js"></script>






   




   
    
  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  <script src="//cdn.jsdelivr.net/npm/valine"></script>
  <script type="text/javascript">
  var GUEST = ['nick', 'mail', 'link'];
  var meta = 'nick,mail,link';
  meta = meta.split(',').filter(function(item) {
    return GUEST.indexOf(item) > -1;
  });
  new Valine({
    el: '#vcomments',
    verify: false,
    notify: false,
    appId: '8pTCCvyyLGXskH0XW0fWHg7l-gzGzoHsz',
    appKey: 'id3rlCPRdbEoSpUfhJgcLhDO',
    placeholder: 'Just go go',
    avatar: 'mm',
    meta: meta,
    pageSize: '10' || 10,
    visitor: true
  });
  </script>

     







<script src="/bigbig-shark/live2dw/lib/L2Dwidget.min.js?094cbace49a39548bed64abff5988b05"></script><script>L2Dwidget.init({"pluginModelPath":"assets/","model":{"jsonPath":"/bigbig-shark/live2dw/assets/tororo.model.json"},"display":{"position":null,"width":300,"height":600},"log":false,"pluginJsPath":"lib/","pluginRootPath":"live2dw/","tagMode":false});</script></body>
</html>